home *** CD-ROM | disk | FTP | other *** search
- /* $Copyright: $
- * Copyright (c) 1984, 1985, 1986 Sequent Computer Systems, Inc.
- * All rights reserved
- *
- * This software is furnished under a license and may be used
- * only in accordance with the terms of that license and with the
- * inclusion of the above copyright notice. This software may not
- * be provided or otherwise made available to, or used by, any
- * other person. No title to or ownership of the software is
- * hereby transferred.
- */
-
- /* $Header: /crg2/bruces2/rbk/C++/Presto/src/RCS/parallel.h,v 1.2 88/03/24 09:52:47 rbk Exp $
- *
- * parallel.h
- * Definitions for use in parallel, shared-memory programs.
- */
-
- /* $Log: parallel.h,v $
- *
- * 90/01/14 jef
- * HC_Spinlock fails sporadically when 16 processors are contending for a
- * lock. Visual inspection of code in hc_slock_asm.h SEEMS to indicate an
- * 'off-by-1' problem - the private lock data is in x [procid][16] in an
- * array indexed [p][16] (i.e., the private lock data is *ACTUALLY* dealt
- * with in x [procid +1 ][0]!!). Everything would still work fine, except
- * for processor 16, whose private data would be found one byte off the end
- * of the array (and when that byte changes value, processor 16 gets the
- * lock, even if it shouldn't have!). Modifying the hc_slock_t structure
- * to be [procs][17] seems to solve the problem - however, the real
- * problem is in the code and/or comments in hc_slock_asm.h.
- *
- * 89/12/28 jef
- * Add support for a second type of spinlock which works well in
- * high lock contention situations (after original by raj).
- *
- * Revision 1.2 88/03/24 09:52:47 rbk
- * #ifdef ns32000 around ALM fuss. Can't use system version of this file
- * since C++ doesn't understand asm-functions yet.
- *
- * Revision 1.1 88/03/22 15:26:42 rbk
- * Initial revision
- *
- */
-
- #ifndef parallel_h
- #define parallel_h
-
- /*
- * Number of processors supported by PRESTO.
- */
- #define NUMPROCS 16
-
- /*
- * Data structure for regular spinlock.
- * A "lock" is a byte of memory, initialized to zero (unlocked).
- */
- typedef unsigned char slock_t;
-
- #ifdef i386
- /*
- * Data structure for high-contention spinlock based on queueing.
- *
- * In a regular spinlock, all threads requesting the lock spin, performing
- * test-and-set operations on a single shared memory location. The
- * resulting cache invalidation traffic quickly saturates the bus with
- * only a small number of spinning processors, impacting the performance
- * of the thread holding the lock (by interfering with it's ability to
- * access memory) and causing a rapid falloff in performance.
- *
- * The queue-based spinlock is designed to eliminate this problem. Threads
- * spin on a private location instead of a shared location, eliminating the
- * cache invalidation traffic. To release a lock, the thread holding the
- * lock simply sets the private location that the next waiter is spinning on
- * (in this case no atomic instruction is needed).
- *
- * Since a spinning thread does not relinquish it's processor, the
- * lock data structure need only support NUMPROCS threads. In this
- * implementation, the DYNIX process id (from getpid ()) of the requesting
- * thread is used to index into the lock data structure. Processor ids
- * are converted to range from 1 to NUMPROCS (as opposed to 0 to NUMPROCS-1
- * as in PRESTO). Row 0 of the array is reserved for use in indicating the
- * identity of the last requestor of the lock. It also stores a copy of
- * the last requestor's private data area.
- *
- * The private location which each processor spins on while it waits to
- * acquire the lock is (supposedly) located at x [last_procid] [15], i.e.,
- * it spins on the private location of the last processor which requested
- * the lock (NOTE: from the code in hc_slock_asm.h, I believe this
- * location is ACTUALLY x [last_procid] [*16*], which is why I increased
- * the length of each row to 17 (jef)). I believe that the extra
- * 15 bytes in each row are used to force each private location into a
- * separate cache line on the 386, thus eliminating extra cache coherency
- * traffic when a private location is set by a releasing lock holder.
- *
- * See comments in hc_slock_asm.h for more details.
- */
-
- #define HC_S_LOCK_SIZE (NUMPROCS+1)
- typedef struct hc_slock_t { unsigned char x[HC_S_LOCK_SIZE][17]; };
-
- #endif 386
-
- #define L_UNLOCKED 0
- #define L_LOCKED 1
-
- /*
- * Was a conditional lock request granted (L_SUCCESS) or denied (L_FAILED)
- */
- #define L_FAILED 0
- #define L_SUCCESS 1
-
- /*
- * A "barrier" allows multiple processes to synchronize by having
- * all of them exit the barrier construct simultaneously.
- *
- * This version assumes <= 255 processes, fits in one 4-byte integer,
- * and is based on spin-locks.
- */
-
- typedef struct {
- slock_t b_mutex; /* mutual exclusion */
- unsigned char b_limit; /* number participants */
- unsigned char b_count; /* state counter */
- unsigned char b_useno; /* current use # (state flag) */
- } sbarrier_t; /* 's' for "spin"-barrier */
-
-
- #ifndef c_plusplus
- /*
- * Other useful declarations.
- */
-
- extern char *sbrk(), *shsbrk();
- extern char *shmalloc();
-
- #endif
-
- #ifdef ns32000
- /*
- * ALM_HASH() is used to hash an address to an ALM offset.
- */
-
- #define ALM_HASH(x) ((int)(&(x)) & (0xFF << 2))
-
- /*
- * S_LOCK() and S_UNLOCK() provide in-line access to locks for C-programs;
- * these can be used in time-criticial situations, at a cost in code size.
- *
- * CAREFUL using S_LOCK(): cc -O -i doesn't do S_LOCK(&x) correctly; need
- * to pass pointer to lock, not address of lock.
- */
-
- #define S_LOCK(lp) { \
- register char *lock_alm = &_alm_base[ALM_HASH(*(lp))]; \
- for (;;) { \
- /* Wait for lock to be available */ \
- while (*(lp) == L_LOCKED) \
- continue; \
- /* Grab ALM gate for atomic access to lock */ \
- while (*lock_alm & ALM_LOCKED) \
- continue; \
- /* Can race with others trying to get the lock */ \
- if (*(lp) == L_UNLOCKED) { \
- /* No race (or won it) -- grab the lock */ \
- *(lp) = L_LOCKED; \
- *lock_alm = ALM_UNLOCKED; \
- break; \
- } \
- /* Lost race, try again */ \
- *lock_alm = ALM_UNLOCKED; \
- } \
- }
-
- #define S_UNLOCK(lp) (*(lp) = L_UNLOCKED)
-
- /*
- * Various implementation dependent parameters.
- */
-
- #define NBALM 4 /* number bytes per ALM */
- #define ALMSIZE 1024 /* size of our portion of ALMs */
-
- #define ALM_UNLOCKED 0
- #define ALM_LOCKED 1
-
- #define NALMDEVS 64 /* # different ALM's to try for */
-
- #define ADDR_RND 0x800 /* boundary round (text/data/end) */
-
- #endif ns32000
-
- /*
- * Convenience definitions.
- */
-
- #ifndef NULL
- #define NULL 0
- #endif
-
- /*
- * Various globally used data.
- */
-
- extern int errno;
-
- extern int _shm_fd; /* fd for shared data mapped file */
- #ifdef ns32000
- extern char *_alm_base; /* virt addr of mapped ALM's */
- #endif ns32000
- extern int _pgoff; /* getpagesize() - 1 */
-
- /*
- * PGRND() rounds up a value to next page boundary.
- */
-
- #define PGRND(x) (char *) (((int)(x) + _pgoff) & ~_pgoff)
-
-
- #endif parallel_h
-